##########################################################################################################################################
###### This file prepares the merger dataset further. You need to first have saved
# the file "Merger_Interm_save.RData". Furthermore, you need additional raw data as
# detailed below.
###############################

#####
# this is a file containing information from Compustat. The file includes
# the following data items from Compustat:
# - gvkey
# - datadate
# - fyear
# - indfmt
# - consol
# - popsrc
# - datafmt
# - tic
# - cusip
# - curcd
# - costat
# - naicsh
# - sich
# Exactly in this order.
file_path <- "Historical_NAICS_SIC.txt"
####

# Import the text file using read.table()
compustat <- read.delim(file_path)
compustat<-compustat[,c(1,2,9,12,13)]
compustat<-unique(compustat)
compustat$cus <- substr(compustat$cusip, 1, 6)
compustat$nai <- substr(compustat$naicsh, 1, 3)
seg2<-compustat[,c(1,2,5,6,7)]
seg2<-seg2[!is.na(seg2$nai)|!is.na(seg2$sic),]

load("Merger_Interm_save.RData")
Mergers_New$Ac_ind<-NA
Mergers_New$TAR_ind<-NA
Mergers_New$Ac_ind2<-NA
Mergers_New$TAR_ind2<-NA
Mergers_New$Ac_ind25<-NA
Mergers_New$TAR_ind25<-NA
Mergers_New$Ac_ind50<-NA
Mergers_New$TAR_ind50<-NA
Mergers_New$Ac_ind100<-NA
Mergers_New$TAR_ind100<-NA
Mergers_New$Ac_ind200<-NA
Mergers_New$TAR_ind200<-NA
Mergers_New$Ac_ind300<-NA
Mergers_New$TAR_ind300<-NA
Mergers_New$Ac_ind400<-NA
Mergers_New$TAR_ind400<-NA
Mergers_New$Ac_ind500<-NA
Mergers_New$TAR_ind500<-NA
Mergers_New$similarity<-NA
Mergers_New$Ac_gvkey<-NA
Mergers_New$TAR_gvkey<-NA
for(i in 1:nrow(Mergers_New)){
  tgt<-Mergers_New$Acquiror.6.digit.CUSIP[i]
  q<-seg2[seg2$cus==tgt,]
  if(nrow(q)>0){
    q$dist<-as.Date(Mergers_New$Date.Announced[i])-as.Date(q$datadate)
    q<-q[q$dist>=0,]
    q<-q[order(q$dist),]
    Mergers_New$Ac_ind[i]<-q$sich[1]
    Mergers_New$Ac_ind2[i]<-q$nai[1]
    Mergers_New$Ac_gvkey[i]<-q$gvkey[1]
  }
  ####
  tgt<-Mergers_New$Target.6.digit.CUSIP[i]
  q<-seg2[seg2$cus==tgt,]
  if(nrow(q)>0){
    q$dist<-as.Date(Mergers_New$Date.Announced[i])-as.Date(q$datadate)
    q<-q[q$dist>=0,]
    q<-q[order(q$dist),]
    Mergers_New$TAR_ind[i]<-q$sich[1]
    Mergers_New$TAR_ind2[i]<-q$nai[1]
    Mergers_New$TAR_gvkey[i]<-q$gvkey[1]
  }
}
# get the text-based industry classifications
df <- read.table("fic_data.txt",sep="\t",header=TRUE)
Mergers_New$year=substr(Mergers_New$Date.Announced,1,4)
for(i in 1:nrow(Mergers_New)){
  if(!is.na(Mergers_New$Ac_gvkey[i])){
    q=df[df$gvkey==Mergers_New$Ac_gvkey[i],]
    q$dist<-as.numeric(Mergers_New$year[i])-q$year
    q<-q[q$dist>=0,]
    q<-q[order(q$dist),]
    Mergers_New$Ac_ind25[i]<-q$icode25[1]
    Mergers_New$Ac_ind50[i]<-q$icode50[1]
    Mergers_New$Ac_ind100[i]<-q$icode100[1]
    Mergers_New$Ac_ind200[i]<-q$icode200[1]
    Mergers_New$Ac_ind300[i]<-q$icode300[1]
    Mergers_New$Ac_ind400[i]<-q$icode400[1]
    Mergers_New$Ac_ind500[i]<-q$icode500[1]
  }
  if(!is.na(Mergers_New$TAR_gvkey[i])){
    q=df[df$gvkey==Mergers_New$TAR_gvkey[i],]
    q$dist<-as.numeric(Mergers_New$year[i])-q$year
    q<-q[q$dist>=0,]
    q<-q[order(q$dist),]
    Mergers_New$TAR_ind25[i]<-q$icode25[1]
    Mergers_New$TAR_ind50[i]<-q$icode50[1]
    Mergers_New$TAR_ind100[i]<-q$icode100[1]
    Mergers_New$TAR_ind200[i]<-q$icode200[1]
    Mergers_New$TAR_ind300[i]<-q$icode300[1]
    Mergers_New$TAR_ind400[i]<-q$icode400[1]
    Mergers_New$TAR_ind500[i]<-q$icode500[1]
  }
}
####
# this is a file from the Hoberg Philips data repository
# it can be found here: https://hobergphillips.tuck.dartmouth.edu/idata/tnic3_data.zip
# download it and unzip to your path.
tmp <- read.table("tnic3_data.txt",sep="\t",header=TRUE)
####

# according to the description, the similarity threshold is already applied when defining the TNIC3.
# Thus, all missing values are not related and those with a score are defined as competitors.
Mergers_New$year=substr(Mergers_New$Date.Announced,1,4)
for(i in 1:nrow(Mergers_New)){
  if(!is.na(Mergers_New$Ac_gvkey[i])&!is.na(Mergers_New$TAR_gvkey[i])){
    q=tmp[tmp$gvkey1==Mergers_New$Ac_gvkey[i]&tmp$gvkey2==Mergers_New$TAR_gvkey[i],,drop=FALSE]
    if(nrow(q)>0){
      q$dist<-as.numeric(Mergers_New$year[i])-q$year
      q<-q[q$dist>=0,]
      q<-q[order(q$dist),]
      Mergers_New$similarity[i]<-q$score[1]
    }
    q=tmp[tmp$gvkey2==Mergers_New$Ac_gvkey[i]&tmp$gvkey1==Mergers_New$TAR_gvkey[i],,drop=FALSE]
    if(nrow(q)>0){
      q$dist<-as.numeric(Mergers_New$year[i])-q$year
      q<-q[q$dist>=0,]
      q<-q[order(q$dist),]
      Mergers_New$similarity[i]<-q$score[1]
    }
  }
}

save(Mergers_New,file="Mergers_with_SIC_NEW.RData")